From 0c914c9072317d31a29ad52bd59dcdfa798090e8 Mon Sep 17 00:00:00 2001 From: "kaf24@viper.(none)" Date: Tue, 1 Feb 2005 23:14:05 +0000 Subject: [PATCH] bitkeeper revision 1.1159.235.1 (42000d3dwcPyT8aY4VIPYGCfCAJuQQ) More x86/64. Status: traps.c now included in the build, but actual building of IDT doesn't happen, and we need some sort of entry.S. More page-table building required so that arch_init_memory() can work. And there is something odd with MP-table parsing; I currently suspect that __init sections are causing problems. Signed-off-by: keir.fraser@cl.cam.ac.uk --- .rootkeys | 2 + xen/arch/x86/boot/x86_64.S | 21 +-- xen/arch/x86/memory.c | 2 + xen/arch/x86/traps.c | 280 +++--------------------------- xen/arch/x86/x86_32/traps.c | 226 ++++++++++++++++++++++++ xen/arch/x86/x86_64/traps.c | 127 ++++++++++++++ xen/include/asm-x86/desc.h | 43 ++++- xen/include/asm-x86/regs.h | 2 + xen/include/asm-x86/x86_64/regs.h | 13 +- 9 files changed, 433 insertions(+), 283 deletions(-) create mode 100644 xen/arch/x86/x86_32/traps.c create mode 100644 xen/arch/x86/x86_64/traps.c diff --git a/.rootkeys b/.rootkeys index 2d09667074..a00f425304 100644 --- a/.rootkeys +++ b/.rootkeys @@ -901,11 +901,13 @@ 3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c 40f92331jfOlE7MfKwpdkEb1CEf23g xen/arch/x86/x86_32/seg_fixup.c +42000d3ckiFc1qxa4AWqsd0t3lxuyw xen/arch/x86/x86_32/traps.c 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds 41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c 40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S 41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c +42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c 40e96d3ahBTZqbTViInnq0lM03vs7A xen/arch/x86/x86_64/usercopy.c 40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/x86/x86_64/xen.lds 3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile diff --git a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S index af79710199..f1bc4ece46 100644 --- a/xen/arch/x86/boot/x86_64.S +++ b/xen/arch/x86/boot/x86_64.S @@ -241,26 +241,17 @@ ENTRY(cpu0_stack) # Initial stack is 8kB ENTRY(stext) ENTRY(_stext) -.globl ret_from_intr, copy_to_user, set_intr_gate, die +.globl switch_to, ret_from_intr, do_iopl +switch_to: ret_from_intr: -copy_to_user: -set_intr_gate: -die: -.globl copy_from_user, show_registers, do_iopl -copy_from_user: -show_registers: do_iopl: -.globl idt_table, copy_user_generic, idt_tables, new_thread -idt_table: +.globl copy_from_user, copy_to_user, copy_user_generic, new_thread +copy_from_user: +copy_to_user: copy_user_generic: -idt_tables: new_thread: -.globl switch_to, __get_user_1, __get_user_4, __get_user_8, trap_init -switch_to: +.globl __get_user_1, __get_user_4, __get_user_8 __get_user_1: __get_user_4: __get_user_8: -trap_init: -.globl set_debugreg -set_debugreg: diff --git a/xen/arch/x86/memory.c b/xen/arch/x86/memory.c index 668a8c4af4..b221cb269d 100644 --- a/xen/arch/x86/memory.c +++ b/xen/arch/x86/memory.c @@ -168,6 +168,7 @@ void __init init_frametable(void) void arch_init_memory(void) { +#ifdef __i386__ unsigned long i; /* @@ -219,6 +220,7 @@ void arch_init_memory(void) frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; } +#endif } static void __invalidate_shadow_ldt(struct exec_domain *d) diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index eafb1bc669..773f6be78e 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -66,12 +66,6 @@ char opt_nmi[10] = "fatal"; #endif string_param("nmi", opt_nmi); -#define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r))) - -#define DOUBLEFAULT_STACK_SIZE 1024 -static struct tss_struct doublefault_tss; -static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - asmlinkage int hypercall(void); /* Master table, and the one used by CPU0. */ @@ -99,116 +93,6 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -int kstack_depth_to_print = 8*20; - -static inline int kernel_text_address(unsigned long addr) -{ - if (addr >= (unsigned long) &_stext && - addr <= (unsigned long) &_etext) - return 1; - return 0; - -} - -void show_guest_stack(void) -{ - int i; - execution_context_t *ec = get_execution_context(); - unsigned long *stack = (unsigned long *)ec->esp; - printk("Guest EIP is %lx\n",ec->eip); - - for ( i = 0; i < kstack_depth_to_print; i++ ) - { - if ( ((long)stack & (STACK_SIZE-1)) == 0 ) - break; - if ( i && ((i % 8) == 0) ) - printk("\n "); - printk("%08lx ", *stack++); - } - printk("\n"); - -} - -void show_trace(unsigned long *esp) -{ - unsigned long *stack, addr; - int i; - - printk("Call Trace from ESP=%p: ", esp); - stack = esp; - i = 0; - while (((long) stack & (STACK_SIZE-1)) != 0) { - addr = *stack++; - if (kernel_text_address(addr)) { - if (i && ((i % 6) == 0)) - printk("\n "); - printk("[<%08lx>] ", addr); - i++; - } - } - printk("\n"); -} - -void show_stack(unsigned long *esp) -{ - unsigned long *stack; - int i; - - printk("Stack trace from ESP=%p:\n", esp); - - stack = esp; - for ( i = 0; i < kstack_depth_to_print; i++ ) - { - if ( ((long)stack & (STACK_SIZE-1)) == 0 ) - break; - if ( i && ((i % 8) == 0) ) - printk("\n "); - if ( kernel_text_address(*stack) ) - printk("[%08lx] ", *stack++); - else - printk("%08lx ", *stack++); - } - printk("\n"); - - show_trace( esp ); -} - -void show_registers(struct xen_regs *regs) -{ - unsigned long esp; - unsigned short ss, ds, es, fs, gs; - - if ( GUEST_FAULT(regs) ) - { - esp = regs->esp; - ss = regs->ss & 0xffff; - ds = regs->ds & 0xffff; - es = regs->es & 0xffff; - fs = regs->fs & 0xffff; - gs = regs->gs & 0xffff; - } - else - { - esp = (unsigned long)(®s->esp); - ss = __HYPERVISOR_DS; - ds = __HYPERVISOR_DS; - es = __HYPERVISOR_DS; - fs = __HYPERVISOR_DS; - gs = __HYPERVISOR_DS; - } - - printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx\n", - smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags); - printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - ds, es, fs, gs, ss); - - show_stack((unsigned long *)®s->esp); -} - /* * This is called for faults at very unexpected times (e.g., when interrupts * are disabled). In such situations we can't do much that is safe. We try to @@ -231,7 +115,7 @@ asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs) if ( trapnr == TRAP_page_fault ) { - __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : ); + __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : ); printk("Faulting linear address might be %08lx\n", cr2); } @@ -344,38 +228,6 @@ asmlinkage int do_int3(struct xen_regs *regs) return 0; } -asmlinkage void do_double_fault(void) -{ - struct tss_struct *tss = &doublefault_tss; - unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; - - /* Disable the NMI watchdog. It's useless now. */ - watchdog_on = 0; - - /* Find information saved during fault and dump it to the console. */ - tss = &init_tss[cpu]; - printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n", - cpu, tss->cs, tss->eip, tss->eflags); - printk("CR3: %08x\n", tss->__cr3); - printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n", - tss->eax, tss->ebx, tss->ecx, tss->edx); - printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n", - tss->esi, tss->edi, tss->ebp, tss->esp); - printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - tss->ds, tss->es, tss->fs, tss->gs, tss->ss); - printk("************************************\n"); - printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu); - printk("System needs manual reset.\n"); - printk("************************************\n"); - - /* Lock up the console to prevent spurious output from other CPUs. */ - console_force_lock(); - - /* Wait for manual reset. */ - for ( ; ; ) - __asm__ __volatile__ ( "hlt" ); -} - asmlinkage void do_machine_check(struct xen_regs *regs) { fatal_trap(TRAP_machine_check, regs); @@ -408,7 +260,7 @@ asmlinkage int do_page_fault(struct xen_regs *regs) int cpu = ed->processor; int ret; - __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : ); + __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : ); DEBUGGER_trap_entry(TRAP_page_fault, regs); @@ -477,6 +329,7 @@ asmlinkage int do_page_fault(struct xen_regs *regs) DEBUGGER_trap_fatal(TRAP_page_fault, regs); +#ifdef __i386__ if ( addr >= PAGE_OFFSET ) { unsigned long page; @@ -493,6 +346,7 @@ asmlinkage int do_page_fault(struct xen_regs *regs) printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n"); #endif } +#endif /* __i386__ */ show_registers(regs); panic("CPU%d FATAL PAGE FAULT\n" @@ -542,7 +396,7 @@ static int emulate_privileged_op(struct xen_regs *regs) eip += 1; if ( (opcode & 0xc0) != 0xc0 ) goto fail; - reg = decode_reg(regs, opcode); + reg = decode_reg(regs, opcode & 7); switch ( (opcode >> 3) & 7 ) { case 0: /* Read CR0 */ @@ -570,7 +424,7 @@ static int emulate_privileged_op(struct xen_regs *regs) eip += 1; if ( (opcode & 0xc0) != 0xc0 ) goto fail; - reg = decode_reg(regs, opcode); + reg = decode_reg(regs, opcode & 7); switch ( (opcode >> 3) & 7 ) { case 0: /* Write CR0 */ @@ -629,7 +483,6 @@ static int emulate_privileged_op(struct xen_regs *regs) asmlinkage int do_general_protection(struct xen_regs *regs) { struct exec_domain *ed = current; - struct domain *d = ed->domain; struct trap_bounce *tb = &ed->thread.trap_bounce; trap_info_t *ti; unsigned long fixup; @@ -681,7 +534,7 @@ asmlinkage int do_general_protection(struct xen_regs *regs) return 0; #if defined(__i386__) - if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) && + if ( VM_ASSIST(ed->domain, VMASST_TYPE_4gb_segments) && (regs->error_code == 0) && gpf_emulate_4gb(regs) ) return 0; @@ -791,19 +644,19 @@ asmlinkage int math_state_restore(struct xen_regs *regs) asmlinkage int do_debug(struct xen_regs *regs) { - unsigned int condition; + unsigned long condition; struct exec_domain *d = current; struct trap_bounce *tb = &d->thread.trap_bounce; DEBUGGER_trap_entry(TRAP_debug, regs); - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + __asm__ __volatile__("mov %%db6,%0" : "=r" (condition)); /* Mask out spurious debug traps due to lazy DR7 setting */ if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) && (d->thread.debugreg[7] == 0) ) { - __asm__("movl %0,%%db7" : : "r" (0)); + __asm__("mov %0,%%db7" : : "r" (0UL)); goto out; } @@ -836,30 +689,17 @@ asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs) return EXCRET_not_a_fault; } -#define _set_gate(gate_addr,type,dpl,addr) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ - "movw %4,%%dx\n\t" \ - "movl %%eax,%0\n\t" \ - "movl %%edx,%1" \ - :"=m" (*((long *) (gate_addr))), \ - "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ - :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ - "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \ -} while (0) - void set_intr_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,14,0,addr); } -static void __init set_system_gate(unsigned int n, void *addr) +void set_system_gate(unsigned int n, void *addr) { _set_gate(idt_table+n,14,3,addr); } -static void set_task_gate(unsigned int n, unsigned int sel) +void set_task_gate(unsigned int n, unsigned int sel) { idt_table[n].a = sel << 16; idt_table[n].b = 0x8500; @@ -875,17 +715,6 @@ static void set_task_gate(unsigned int n, unsigned int sel) *(gate_addr) = (((base) & 0x0000ffff)<<16) | \ ((limit) & 0x0ffff); } -#define _set_tssldt_desc(n,addr,limit,type) \ -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ - "movw %%ax,2(%2)\n\t" \ - "rorl $16,%%eax\n\t" \ - "movb %%al,4(%2)\n\t" \ - "movb %4,5(%2)\n\t" \ - "movb $0,6(%2)\n\t" \ - "movb %%ah,7(%2)\n\t" \ - "rorl $16,%%eax" \ - : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) - void set_tss_desc(unsigned int n, void *addr) { _set_tssldt_desc( @@ -897,25 +726,10 @@ void set_tss_desc(unsigned int n, void *addr) void __init trap_init(void) { - /* - * Make a separate task for double faults. This will get us debug output if - * we blow the kernel stack. - */ - struct tss_struct *tss = &doublefault_tss; - memset(tss, 0, sizeof(*tss)); - tss->ds = __HYPERVISOR_DS; - tss->es = __HYPERVISOR_DS; - tss->ss = __HYPERVISOR_DS; - tss->esp = (unsigned long) - &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; - tss->__cr3 = __pa(idle_pg_table); - tss->cs = __HYPERVISOR_CS; - tss->eip = (unsigned long)do_double_fault; - tss->eflags = 2; - tss->bitmap = IOBMP_INVALID_OFFSET; - _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, - (int)tss, 235, 0x89); + extern void doublefault_init(void); + doublefault_init(); +#ifdef __i386__ /* * Note that interrupt gates are always used, rather than trap gates. We * must have interrupts disabled until DS/ES/FS/GS are saved because the @@ -948,6 +762,7 @@ void __init trap_init(void) /* Only ring 1 can access Xen services. */ _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall); +#endif /* CPU0 uses the master IDT. */ idt_tables[0] = idt_table; @@ -1015,57 +830,6 @@ long do_set_callbacks(unsigned long event_selector, } -long set_fast_trap(struct exec_domain *p, int idx) -{ - trap_info_t *ti; - - /* Index 0 is special: it disables fast traps. */ - if ( idx == 0 ) - { - if ( p == current ) - CLEAR_FAST_TRAP(&p->thread); - SET_DEFAULT_FAST_TRAP(&p->thread); - return 0; - } - - /* - * We only fast-trap vectors 0x20-0x2f, and vector 0x80. - * The former range is used by Windows and MS-DOS. - * Vector 0x80 is used by Linux and the BSD variants. - */ - if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) - return -1; - - ti = p->thread.traps + idx; - - /* - * We can't virtualise interrupt gates, as there's no way to get - * the CPU to automatically clear the events_mask variable. - */ - if ( TI_GET_IF(ti) ) - return -1; - - if ( p == current ) - CLEAR_FAST_TRAP(&p->thread); - - p->thread.fast_trap_idx = idx; - p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff); - p->thread.fast_trap_desc.b = - (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13; - - if ( p == current ) - SET_FAST_TRAP(&p->thread); - - return 0; -} - - -long do_set_fast_trap(int idx) -{ - return set_fast_trap(current, idx); -} - - long do_fpu_taskswitch(void) { set_bit(EDF_GUEST_STTS, ¤t->ed_flags); @@ -1083,22 +847,22 @@ long set_debugreg(struct exec_domain *p, int reg, unsigned long value) case 0: if ( value > (PAGE_OFFSET-4) ) return -EPERM; if ( p == current ) - __asm__ ( "movl %0, %%db0" : : "r" (value) ); + __asm__ ( "mov %0, %%db0" : : "r" (value) ); break; case 1: if ( value > (PAGE_OFFSET-4) ) return -EPERM; if ( p == current ) - __asm__ ( "movl %0, %%db1" : : "r" (value) ); + __asm__ ( "mov %0, %%db1" : : "r" (value) ); break; case 2: if ( value > (PAGE_OFFSET-4) ) return -EPERM; if ( p == current ) - __asm__ ( "movl %0, %%db2" : : "r" (value) ); + __asm__ ( "mov %0, %%db2" : : "r" (value) ); break; case 3: if ( value > (PAGE_OFFSET-4) ) return -EPERM; if ( p == current ) - __asm__ ( "movl %0, %%db3" : : "r" (value) ); + __asm__ ( "mov %0, %%db3" : : "r" (value) ); break; case 6: /* @@ -1108,7 +872,7 @@ long set_debugreg(struct exec_domain *p, int reg, unsigned long value) value &= 0xffffefff; /* reserved bits => 0 */ value |= 0xffff0ff0; /* reserved bits => 1 */ if ( p == current ) - __asm__ ( "movl %0, %%db6" : : "r" (value) ); + __asm__ ( "mov %0, %%db6" : : "r" (value) ); break; case 7: /* @@ -1129,7 +893,7 @@ long set_debugreg(struct exec_domain *p, int reg, unsigned long value) if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM; } if ( p == current ) - __asm__ ( "movl %0, %%db7" : : "r" (value) ); + __asm__ ( "mov %0, %%db7" : : "r" (value) ); break; default: return -EINVAL; diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c new file mode 100644 index 0000000000..edb63a67fb --- /dev/null +++ b/xen/arch/x86/x86_32/traps.c @@ -0,0 +1,226 @@ + +#include +#include +#include +#include +#include +#include +#include + +static int kstack_depth_to_print = 8*20; + +static inline int kernel_text_address(unsigned long addr) +{ + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + return 0; + +} + +void show_guest_stack(void) +{ + int i; + execution_context_t *ec = get_execution_context(); + unsigned long *stack = (unsigned long *)ec->esp; + printk("Guest EIP is %lx\n",ec->eip); + + for ( i = 0; i < kstack_depth_to_print; i++ ) + { + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + break; + if ( i && ((i % 8) == 0) ) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + +} + +void show_trace(unsigned long *esp) +{ + unsigned long *stack, addr; + int i; + + printk("Call Trace from ESP=%p: ", esp); + stack = esp; + i = 0; + while (((long) stack & (STACK_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_stack(unsigned long *esp) +{ + unsigned long *stack; + int i; + + printk("Stack trace from ESP=%p:\n", esp); + + stack = esp; + for ( i = 0; i < kstack_depth_to_print; i++ ) + { + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + break; + if ( i && ((i % 8) == 0) ) + printk("\n "); + if ( kernel_text_address(*stack) ) + printk("[%08lx] ", *stack++); + else + printk("%08lx ", *stack++); + } + printk("\n"); + + show_trace( esp ); +} + +void show_registers(struct xen_regs *regs) +{ + unsigned long esp; + unsigned short ss, ds, es, fs, gs; + + if ( GUEST_FAULT(regs) ) + { + esp = regs->esp; + ss = regs->ss & 0xffff; + ds = regs->ds & 0xffff; + es = regs->es & 0xffff; + fs = regs->fs & 0xffff; + gs = regs->gs & 0xffff; + } + else + { + esp = (unsigned long)(®s->esp); + ss = __HYPERVISOR_DS; + ds = __HYPERVISOR_DS; + es = __HYPERVISOR_DS; + fs = __HYPERVISOR_DS; + gs = __HYPERVISOR_DS; + } + + printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags); + printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + ds, es, fs, gs, ss); + + show_stack((unsigned long *)®s->esp); +} + +#define DOUBLEFAULT_STACK_SIZE 1024 +static struct tss_struct doublefault_tss; +static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + +asmlinkage void do_double_fault(void) +{ + struct tss_struct *tss = &doublefault_tss; + unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1; + + /* Disable the NMI watchdog. It's useless now. */ + watchdog_on = 0; + + /* Find information saved during fault and dump it to the console. */ + tss = &init_tss[cpu]; + printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n", + cpu, tss->cs, tss->eip, tss->eflags); + printk("CR3: %08x\n", tss->__cr3); + printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n", + tss->eax, tss->ebx, tss->ecx, tss->edx); + printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n", + tss->esi, tss->edi, tss->ebp, tss->esp); + printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + tss->ds, tss->es, tss->fs, tss->gs, tss->ss); + printk("************************************\n"); + printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu); + printk("System needs manual reset.\n"); + printk("************************************\n"); + + /* Lock up the console to prevent spurious output from other CPUs. */ + console_force_lock(); + + /* Wait for manual reset. */ + for ( ; ; ) + __asm__ __volatile__ ( "hlt" ); +} + +void __init doublefault_init(void) +{ + /* + * Make a separate task for double faults. This will get us debug output if + * we blow the kernel stack. + */ + struct tss_struct *tss = &doublefault_tss; + memset(tss, 0, sizeof(*tss)); + tss->ds = __HYPERVISOR_DS; + tss->es = __HYPERVISOR_DS; + tss->ss = __HYPERVISOR_DS; + tss->esp = (unsigned long) + &doublefault_stack[DOUBLEFAULT_STACK_SIZE]; + tss->__cr3 = __pa(idle_pg_table); + tss->cs = __HYPERVISOR_CS; + tss->eip = (unsigned long)do_double_fault; + tss->eflags = 2; + tss->bitmap = IOBMP_INVALID_OFFSET; + _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY, + (int)tss, 235, 0x89); +} + +long set_fast_trap(struct exec_domain *p, int idx) +{ + trap_info_t *ti; + + /* Index 0 is special: it disables fast traps. */ + if ( idx == 0 ) + { + if ( p == current ) + CLEAR_FAST_TRAP(&p->thread); + SET_DEFAULT_FAST_TRAP(&p->thread); + return 0; + } + + /* + * We only fast-trap vectors 0x20-0x2f, and vector 0x80. + * The former range is used by Windows and MS-DOS. + * Vector 0x80 is used by Linux and the BSD variants. + */ + if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) ) + return -1; + + ti = p->thread.traps + idx; + + /* + * We can't virtualise interrupt gates, as there's no way to get + * the CPU to automatically clear the events_mask variable. + */ + if ( TI_GET_IF(ti) ) + return -1; + + if ( p == current ) + CLEAR_FAST_TRAP(&p->thread); + + p->thread.fast_trap_idx = idx; + p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff); + p->thread.fast_trap_desc.b = + (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13; + + if ( p == current ) + SET_FAST_TRAP(&p->thread); + + return 0; +} + + +long do_set_fast_trap(int idx) +{ + return set_fast_trap(current, idx); +} diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c new file mode 100644 index 0000000000..f4e7f2626b --- /dev/null +++ b/xen/arch/x86/x86_64/traps.c @@ -0,0 +1,127 @@ + +#include +#include +#include +#include +#include +#include +#include + +static int kstack_depth_to_print = 8*20; + +static inline int kernel_text_address(unsigned long addr) +{ + if (addr >= (unsigned long) &_stext && + addr <= (unsigned long) &_etext) + return 1; + return 0; + +} + +void show_guest_stack(void) +{ + int i; + execution_context_t *ec = get_execution_context(); + unsigned long *stack = (unsigned long *)ec->rsp; + printk("Guest RIP is %lx\n", ec->rip); + + for ( i = 0; i < kstack_depth_to_print; i++ ) + { + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + break; + if ( i && ((i % 8) == 0) ) + printk("\n "); + printk("%08lx ", *stack++); + } + printk("\n"); + +} + +void show_trace(unsigned long *rsp) +{ + unsigned long *stack, addr; + int i; + + printk("Call Trace from RSP=%p: ", rsp); + stack = rsp; + i = 0; + while (((long) stack & (STACK_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { + if (i && ((i % 6) == 0)) + printk("\n "); + printk("[<%08lx>] ", addr); + i++; + } + } + printk("\n"); +} + +void show_stack(unsigned long *rsp) +{ + unsigned long *stack; + int i; + + printk("Stack trace from RSP=%p:\n", rsp); + + stack = rsp; + for ( i = 0; i < kstack_depth_to_print; i++ ) + { + if ( ((long)stack & (STACK_SIZE-1)) == 0 ) + break; + if ( i && ((i % 8) == 0) ) + printk("\n "); + if ( kernel_text_address(*stack) ) + printk("[%08lx] ", *stack++); + else + printk("%08lx ", *stack++); + } + printk("\n"); + + show_trace(rsp); +} + +void show_registers(struct xen_regs *regs) +{ + printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx\n", + smp_processor_id(), 0xffff & regs->cs, regs->rip, regs->eflags); + printk("rax: %08lx rbx: %08lx rcx: %08lx rdx: %08lx\n", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + printk("rsi: %08lx rdi: %08lx rbp: %08lx rsp: %08lx ss: %04x\n", + regs->rsi, regs->rdi, regs->rbp, regs->rsp, regs->ss); + printk("r8: %08lx r9: %08lx r10: %08lx r11: %08lx\n", + regs->r8, regs->r9, regs->r10, regs->r11); + printk("r12: %08lx r13: %08lx r14: %08lx r15: %08lx\n", + regs->r12, regs->r13, regs->r14, regs->r15); + + show_stack((unsigned long *)regs->rsp); +} + +void __init doublefault_init(void) +{ +} + +void *decode_reg(struct xen_regs *regs, u8 b) +{ + switch ( b ) + { + case 0: return ®s->rax; + case 1: return ®s->rcx; + case 2: return ®s->rdx; + case 3: return ®s->rbx; + case 4: return ®s->rsp; + case 5: return ®s->rbp; + case 6: return ®s->rsi; + case 7: return ®s->rdi; + case 8: return ®s->r8; + case 9: return ®s->r9; + case 10: return ®s->r10; + case 11: return ®s->r11; + case 12: return ®s->r12; + case 13: return ®s->r13; + case 14: return ®s->r14; + case 15: return ®s->r15; + } + + return NULL; +} diff --git a/xen/include/asm-x86/desc.h b/xen/include/asm-x86/desc.h index af6dcc5dbb..ee8c7faa3b 100644 --- a/xen/include/asm-x86/desc.h +++ b/xen/include/asm-x86/desc.h @@ -1,5 +1,6 @@ #ifndef __ARCH_DESC_H #define __ARCH_DESC_H +#ifndef __ASSEMBLY__ #define LDT_ENTRY_SIZE 8 @@ -25,7 +26,6 @@ (((_s)>>3) > LAST_RESERVED_GDT_ENTRY) || \ ((_s)&4)) && \ (((_s)&3) == 1)) -#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) /* These are bitmasks for the high 32 bits of a descriptor table entry. */ #define _SEGMENT_TYPE (15<< 8) @@ -38,17 +38,51 @@ #define _SEGMENT_DB ( 1<<22) /* 16- or 32-bit segment */ #define _SEGMENT_G ( 1<<23) /* Granularity */ -#ifndef __ASSEMBLY__ struct desc_struct { u32 a, b; }; #if defined(__x86_64__) + +#define VALID_CODESEL(_s) ((_s) == FLAT_RING3_CS64 || VALID_SEL(_s)) + typedef struct { u64 a, b; } idt_entry_t; + +#define _set_gate(gate_addr,type,dpl,addr) ((void)0) +#define _set_tssldt_desc(n,addr,limit,type) ((void)0) + #elif defined(__i386__) + +#define VALID_CODESEL(_s) ((_s) == FLAT_RING1_CS || VALID_SEL(_s)) + typedef struct desc_struct idt_entry_t; + +#define _set_gate(gate_addr,type,dpl,addr) \ +do { \ + int __d0, __d1; \ + __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ + "movw %4,%%dx\n\t" \ + "movl %%eax,%0\n\t" \ + "movl %%edx,%1" \ + :"=m" (*((long *) (gate_addr))), \ + "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ + :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ + "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \ +} while (0) + +#define _set_tssldt_desc(n,addr,limit,type) \ +__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ + "movw %%ax,2(%2)\n\t" \ + "rorl $16,%%eax\n\t" \ + "movb %%al,4(%2)\n\t" \ + "movb %4,5(%2)\n\t" \ + "movb $0,6(%2)\n\t" \ + "movb %%ah,7(%2)\n\t" \ + "rorl $16,%%eax" \ + : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) + #endif extern struct desc_struct gdt_table[]; @@ -64,8 +98,9 @@ struct Xgt_desc_struct { #define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) extern void set_intr_gate(unsigned int irq, void * addr); +extern void set_system_gate(unsigned int n, void *addr); +extern void set_task_gate(unsigned int n, unsigned int sel); extern void set_tss_desc(unsigned int n, void *addr); #endif /* !__ASSEMBLY__ */ - -#endif +#endif /* __ARCH_DESC_H */ diff --git a/xen/include/asm-x86/regs.h b/xen/include/asm-x86/regs.h index 5ec347c139..3a9f5edb02 100644 --- a/xen/include/asm-x86/regs.h +++ b/xen/include/asm-x86/regs.h @@ -31,4 +31,6 @@ enum EFLAGS { EF_ID = 0x00200000, /* id */ }; +#define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r))) + #endif /* __X86_REGS_H__ */ diff --git a/xen/include/asm-x86/x86_64/regs.h b/xen/include/asm-x86/x86_64/regs.h index d1a2fa26a2..42b8b3c0b8 100644 --- a/xen/include/asm-x86/x86_64/regs.h +++ b/xen/include/asm-x86/x86_64/regs.h @@ -11,18 +11,19 @@ struct xen_regs u64 r12; u64 rbp; u64 rbx; + /* NB. Above here is C callee-saves. */ u64 r11; u64 r10; u64 r9; u64 r8; - u64 rax; - u64 rcx; - u64 rdx; - u64 rsi; - u64 rdi; + union { u64 rax; u32 eax; } __attribute__ ((packed)); + union { u64 rcx; u32 ecx; } __attribute__ ((packed)); + union { u64 rdx; u32 edx; } __attribute__ ((packed)); + union { u64 rsi; u32 esi; } __attribute__ ((packed)); + union { u64 rdi; u32 edi; } __attribute__ ((packed)); u32 error_code; u32 entry_vector; - u64 rip; + union { u64 rip; u64 eip; } __attribute__ ((packed)); u64 cs; u64 eflags; u64 rsp; -- 2.30.2